## Loading required package: grid

Feodo

En este Markdown se intenta representar el número de botnets activas. El dataset utilizado se actualiza cada 5 minutos.

La fuente de los datos utilizados es accesible desde el siguiente enlace https://feodotracker.abuse.ch/.

Including Map

Mapa con la ubicación de las botnets.

library(maps)
library(dplyr)
library(leaflet)
library(Group4)
library(ggplot2)
library(RcmdrMisc)
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: sandwich
if(!file.exists("df.feo")) df.feo<-maxmindg4.df()
## [1] "[*] Initial setup"
## [1] "[*] Read data from source"
##  chr "data.frame"
## Type of dataset :  
## Dimension(row x column) :  348 9 
## Current lenght:  9 & Object size: 57024 
## List of 9
##  $ DetectedDate   : chr [1:2] "POSIXct" "POSIXt"
##  $ DstIP          : chr "character"
##  $ DstPort        : chr "integer"
##  $ LastOnlineDate : chr [1:2] "POSIXct" "POSIXt"
##  $ Malware        : chr "character"
##  $ DetectedWeekday: chr "factor"
##  $ continent_name : chr "character"
##  $ country_code   : chr "character"
##  $ country_name   : chr "character"
## Types of dataset fields: Printing
##  
##  print done 
## Now let's see the values of all non-repeated fields
## 'data.frame':    348 obs. of  9 variables:
##  $ DetectedDate   : POSIXct, format: "2019-05-30 08:28:38" "2019-05-30 08:16:35" ...
##  $ DstIP          : chr  "185.244.149.206" "94.23.174.183" "185.61.149.38" "176.223.133.178" ...
##  $ DstPort        : int  447 447 447 447 443 447 447 447 443 447 ...
##  $ LastOnlineDate : POSIXct, format: "2019-06-01" "2019-06-02" ...
##  $ Malware        : chr  "TrickBot" "TrickBot" "TrickBot" "TrickBot" ...
##  $ DetectedWeekday: Factor w/ 7 levels "Friday","Monday",..: 5 5 5 5 5 5 7 7 7 7 ...
##  $ continent_name : chr  NA "Europe" "Europe" "Europe" ...
##  $ country_code   : chr  NA "CZ" "LV" "RO" ...
##  $ country_name   : chr  NA "Czech Republic" "Latvia" "Romania" ...
## Structure of the dataset fields: 
## [1] "[*] Read RAW data from MaxMind"
## [1] "[*] Subseting scans data set"
## [1] "[*] Expanding MaxMind network ranges"
## [1] "[*] Foreach IP (source and destination) identify network range using parallel computing"
## [1] "[*] Joining source IP's with geolocation data"
## [1] "[*] Tidy data and save it"
# Basic choropleth with leaflet?
leaflet(df.feo) %>%
addTiles()  %>%
setView( lat=10, lng=0 , zoom=2) %>% addCircles(lng = ~longitude, lat = ~latitude)
#basic other map
worldleafmap<-map("world",fill=TRUE,plot=FALSE,wrap=c(-180,180,NA))
worldmap<-leaflet(worldleafmap) %>% addTiles() %>%
  addPolygons(fillColor = topo.colors(10,alpha=NULL),stroke=FALSE)
worldmap %>% addCircles(lng=df.feo$longitude,lat=df.feo$latitude,radius = 50, color = '#ff0000')
#distribution of bot type
barplot(height = table(df.feo$Malware),col = "red",main = "Botnet type distribution")

#detected and last online dates graph
#hist(x = df.feo$DetectedDate, breaks = 10, col = "green",main = "detected date distribution")
qplot(x = df.feo$DetectedDate,geom = "density",colour = I("green"))

#hist(x = df.feo$LastOnlineDate, breaks = 10, col = "blue",main = "Last online by date")
qplot(x = df.feo$LastOnlineDate,geom = "density",colour = I("blue"))

#detected weekday
barplot(height = table(df.feo$DetectedWeekday),col = "orange",main = "Botnet type distribution")

#exaustive weekday and country name data
g <- ggplot(df.feo, aes(df.feo$DetectedWeekday, df.feo$continent_name, color = df.feo$Malware))
g + geom_count() + labs(title="complete continent & weekday data") + xlab("weekday") +
ylab("Continent Name")

#attempting a K-means trying to find clusters showing relations of factors
#only numeric/factor fields
df.feo3<-df.feo[,c("DstIP","latitude","longitude")]
df.feo3<-RcmdrMisc::KMeans(df.feo3,centers=4,iter.max = 40,num.seeds = 5)
df.feo2<-cbind(df.feo,df.feo3$cluster)
df.feo3
## K-means clustering with 4 clusters of sizes 101, 39, 34, 174
## 
## Cluster means:
##        DstIP latitude  longitude
## 1 1435526338 38.93465  -1.915581
## 2  530735882 37.08945  -2.736110
## 3 2472830728 31.39852  12.865953
## 4 3202307007 12.83373 -38.162090
## 
## Clustering vector:
##   [1] 4 1 4 4 1 4 4 4 1 4 1 4 1 4 4 4 2 4 1 1 4 2 4 4 4 4 3 3 1 4 4 1 4 4 3
##  [36] 2 1 4 1 1 1 1 1 1 1 1 2 2 4 3 1 2 4 3 1 1 1 3 4 2 2 3 2 1 2 4 1 4 1 1
##  [71] 1 1 4 4 4 4 1 1 2 4 3 2 4 1 1 4 1 4 1 1 4 4 4 1 4 1 4 1 2 4 4 2 4 2 1
## [106] 1 3 2 1 4 4 4 1 3 4 4 4 4 4 4 4 1 4 1 4 4 4 4 4 4 4 4 4 4 1 1 2 4 4 4
## [141] 4 1 4 1 4 1 4 4 1 4 4 1 4 4 3 4 1 4 4 4 4 1 4 3 1 2 4 4 3 1 3 4 4 4 4
## [176] 4 4 1 4 3 1 4 4 4 1 1 4 4 4 3 4 1 1 4 4 4 2 4 3 3 4 1 4 2 4 3 4 1 1 4
## [211] 4 2 4 4 1 4 4 2 4 4 1 3 4 1 1 1 4 4 4 1 1 4 1 1 4 4 3 3 4 2 1 4 4 3 2
## [246] 4 1 4 1 4 4 4 4 1 4 4 2 3 2 4 2 4 4 4 4 1 4 3 1 4 1 2 4 1 1 4 4 3 4 2
## [281] 4 4 1 4 4 4 1 1 3 1 1 1 3 4 3 3 4 1 4 4 4 2 4 4 4 2 2 4 1 4 4 1 1 4 4
## [316] 1 4 4 1 1 1 2 2 2 2 2 4 4 3 4 4 2 4 2 3 1 4 4 4 3 4 4 1 1 1 3 4 1
## 
## Within cluster sum of squares by cluster:
## [1] 6.035829e+18 3.135394e+18 2.245560e+18 6.583097e+18
##  (between_SS / total_SS =  95.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
df.feocluster1<-df.feo[df.feo2$cluster==1,]
df.feocluster2<-df.feo[df.feo2$cluster==2,]
df.feocluster3<-df.feo[df.feo2$cluster==3,]
df.feocluster4<-df.feo[df.feo2$cluster==4,]
#Results yet to be analyzed

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.